# Import Libraries
library(knitr)
library(psych)
library(kableExtra)
library(tidyquant)
library(quantmod)
library(tseries)
library(tidyverse)
library(GGally)
library(ggplot2)
library(fpp2)
library(dplyr)
library(quantmod)
library(astsa)
library(NTS)
library(rugarch)
library(forecast)
# Import Dataset
nflx_df = getSymbols.yahoo(Symbols = 'NFLX' , env = .GlobalEnv, src = 'yahoo', from = '2016-11-15', to = '2021-11-16', auto.assign = FALSE)
head(nflx_df)
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2016-11-15 114.55 116.41 113.09 113.59 7445100 113.59
## 2016-11-16 112.96 116.12 111.81 115.19 5933700 115.19
## 2016-11-17 115.13 116.81 113.56 115.03 6232700 115.03
## 2016-11-18 115.73 116.42 113.51 115.21 6746800 115.21
## 2016-11-21 116.20 118.72 116.19 117.96 7064500 117.96
## 2016-11-22 118.32 119.46 116.98 118.04 7007200 118.04
tail(nflx_df)
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2021-11-08 650.29 656.00 643.79 651.45 2887500 651.45
## 2021-11-09 653.70 660.50 650.52 655.99 2415600 655.99
## 2021-11-10 653.01 660.33 642.11 646.91 2405800 646.91
## 2021-11-11 650.24 665.82 649.71 657.58 2868300 657.58
## 2021-11-12 660.01 683.34 653.82 682.61 4192700 682.61
## 2021-11-15 681.24 685.26 671.49 679.33 2872200 679.33
# Create new dataframe to preserve original dataset and assign new variable.
nflx <- nflx_df
# Check for Missing Variables
colSums(is.na(nflx))
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume
## 0 0 0 0 0
## NFLX.Adjusted
## 0
# Check structure to determine the shape and data types of the data set
str(nflx)
## An 'xts' object on 2016-11-15/2021-11-15 containing:
## Data: num [1:1259, 1:6] 115 113 115 116 116 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:6] "NFLX.Open" "NFLX.High" "NFLX.Low" "NFLX.Close" ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## List of 2
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2021-12-05 21:53:55"
Moving forward, specified in the exploratory data analysis section of the paper, research shows that adjusted closing price is the most accurate reflection of the true value of stock. Most stock price websites that don’t disclose both closing and adjusted closing prices, the closing price shown is the adjusted price. Based on this, NFLX.Adjusted and dates will be used to predict stock prices.
# New dataframe to select only the adjusted close
adj_nflx = Ad(nflx)
head(adj_nflx)
## NFLX.Adjusted
## 2016-11-15 113.59
## 2016-11-16 115.19
## 2016-11-17 115.03
## 2016-11-18 115.21
## 2016-11-21 117.96
## 2016-11-22 118.04
summary(adj_nflx)
## Index NFLX.Adjusted
## Min. :2016-11-15 Min. :113.6
## 1st Qu.:2018-02-15 1st Qu.:262.9
## Median :2019-05-20 Median :348.5
## Mean :2019-05-18 Mean :351.1
## 3rd Qu.:2020-08-17 3rd Qu.:485.1
## Max. :2021-11-15 Max. :690.3
Summary shows that within the span of five years, Netflix’s stock prices have fluctuated with a range of 576.5, with the average being around 351.1.
par(mfrow=c(2,1))
plot(nflx$NFLX.Volume, type = 'l', ylab = 'Volume', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Volume of Netflix Stock Over Time', line = 1.5, cex = 1, font = 2)
plot(adj_nflx, type = 'l', ylab = 'Close Prices', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Close Prices over Time', line = 1.5, cex = 1, font = 2)
ADF (Augmented Dickey-Fuller) Test Null hypothesis H0 : The times series is non-stationary. Alternative hypothesis HA : The times series is stationary. If p-value is less than the significant level (0.05), reject the null hypothesis and conclude that the times series is stationary. Since p-value is 0.5738, it fails to reject the null value. This will show that this times series is non-stationary.
print(adf.test(adj_nflx))
##
## Augmented Dickey-Fuller Test
##
## data: adj_nflx
## Dickey-Fuller = -2.0107, Lag order = 10, p-value = 0.5738
## alternative hypothesis: stationary
Differencing
adf_diff = diff(log(adj_nflx), lag = 1)
adf_diff = na.locf(adf_diff, na.rm = TRUE, fromLast = TRUE)
plot(adf_diff, main = NULL)
mtext(side = 3, text = 'Logged Difference', line = 1.5, cex = 1, font = 2)
ACF (Autocorrelation Function)
# ACF
acf_nflx = acf(adf_diff, main = 'ACF')
PACF (Partial Autocorrelation Function)
# PACF
pacf_nflx = pacf(adf_diff, main = 'PACF')
Adjusted Closing Price vs Difference
The adjusted closing price shows a relatively steady rise as time increased. When taken the difference, there are some spikes within the data. Because of this, we will take the logged difference to see if there is less variance.
par(mfrow = c(2,1))
plot(adj_nflx, main = 'Adjusted Closing Price', col = 'cornflowerblue')
plot(diff(adj_nflx), main = 'Difference Closing Price', col = 'cornflowerblue')
Decomposition of Additive Time Series This plot shows a steady rise for the trend. The seasonal appears to have a frequency as well.
# Decomposition of Additive Time Series
nflx_ts = ts(adj_nflx, frequency = 365, start = 2015-11-15)
nflx_de = decompose(nflx_ts)
plot(nflx_de)
Logged Difference
The graphs show that the logged difference shows a more balanced variance. Thus, we will explore log_diff for modeling.
Observation/Note: Volatility is observed even after with differencing.
# Calculate the First Degree Difference
diff = diff(adj_nflx)
log_diff = diff(log(adj_nflx))
sqrt_diff = diff(sqrt(adj_nflx))
par(mfrow = c(3,1))
plot(diff, type = 'l', xlab = 'Time', ylab = 'Difference', main = 'First Degree Difference - Raw Data')
plot(log_diff, type = 'l', xlab = 'Time', ylab = 'Logged Difference', main = 'First Degree Difference - Logged Data')
plot(sqrt_diff, type = 'l', xlab = 'Time', ylab = 'Square Root Difference', main = 'First Degree Difference - Square-Root Data')
ACF First Degree Difference
par(mfrow = c(2,1))
acf(sqrt_diff, main = 'Autocorrelation Function of First Differences', na.action = na.pass)
pacf(sqrt_diff, lag.max = 31, main = 'Partial Autocorrelation Function of First Differences', na.action = na.pass)
Second Degree Differencing
Logged difference with second degree shows a more balanced variance. Thus, we will use log_diff2 for modeling.
# Calculate the Second Degree Difference
diff2 = diff(diff)
log_diff2 = diff(log_diff)
sqrt_diff2 = diff(sqrt_diff)
par(mfrow = c(3,1))
plot(diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff', main = 'Second Degree Difference - Raw Data')
plot(log_diff2, type = 'l', xlab = 'Time', ylab = '2nd Log Diff', main = 'Second Degree Difference - Logged Data')
plot(sqrt_diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff - Square-Root', main = 'Second Degree Difference - Square-Root Data')
par(mfrow = c(2, 1))
plot(nflx, ylab = 'QEPS', type = 'o', col = 4, cex = .5, main = NULL)
mtext(side = 3, text = 'Stock Price Growth', line = 1.5, cex = 1, font = 2)
plot(log(nflx), ylab = 'log(QEPS)', type = 'o', col = 4, cex = .5, main = NULL)
Per EDA process, it’s noted that differencing with logging showed better stationarity. Thus, we will be using log_diff for first differencing and log_diff2 for second differencing for our models.
Parameters for the models will be chosen by ACF and PACF plots.
The models with different parameters will be tested. In addition, auto.arima wil be tested for choosing the best models.
Diagnostic measures will be performed to see the residual’s correlation. Since we have noticed high volatility in the EDA process, we will move on to GARCH models to confirm the volatility of Netflix stock prices.
MODELS FOR FIRST DEGREE DIFFERENCING
ACF and PACF of diff_log
Per ACF, spikes at 1, 5, and 8. PACF tapers to zero. Thus, let’s try MA(1).
FirstDiff = diff(log(adj_nflx))
par(mfrow=c(2,1))
acf(FirstDiff, na.action = na.pass, main ='ACF Plot of First Differencing', plot = TRUE)
pacf(FirstDiff, na.action = na.pass, main = NULL)
MODELS FOR SECOND DEGREE DIFFERENCING
ACF was cut off at lag 1. PACF tailed off. Let’s try MA(1)
par(mfrow=c(2,1))
acf(diff(FirstDiff), na.action = na.pass, main ='ACF Plot of Second Differencing')
pacf(diff(FirstDiff), na.action = na.pass, main = NULL)
Now, we assigned the models’ names by different parameters.
Model1 = MA(1) for First Differencing Model2 = MA(1) for Second Differencing Model3 = auto.arima for Differencing for Logged Values
# Model1 = MA(1) for First Differencing
Model1 = sarima((log(adj_nflx)), 0,1,1)
## initial value -3.733756
## iter 2 value -3.736647
## iter 3 value -3.736667
## iter 3 value -3.736667
## iter 3 value -3.736667
## final value -3.736667
## converged
## initial value -3.736665
## iter 1 value -3.736665
## final value -3.736665
## converged
Model1
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1 constant
## -0.0732 0.0014
## s.e. 0.0269 0.0006
##
## sigma^2 estimated as 0.000568: log likelihood = 2915.7, aic = -5825.4
##
## $degrees_of_freedom
## [1] 1256
##
## $ttable
## Estimate SE t.value p.value
## ma1 -0.0732 0.0269 -2.7208 0.0066
## constant 0.0014 0.0006 2.2801 0.0228
##
## $AIC
## [1] -4.630684
##
## $AICc
## [1] -4.630676
##
## $BIC
## [1] -4.618433
# Model2 = MA(1) for Second Differencing
Model2 = sarima((log(adj_nflx)), 0,2,1)
## initial value -3.348754
## iter 2 value -3.595890
## iter 3 value -3.657550
## iter 4 value -3.722692
## iter 5 value -3.725012
## iter 6 value -3.725610
## iter 7 value -3.725624
## iter 8 value -3.725685
## iter 9 value -3.725702
## iter 10 value -3.725703
## iter 10 value -3.725703
## final value -3.725703
## converged
## initial value -3.727702
## iter 2 value -3.730423
## iter 3 value -3.730512
## iter 4 value -3.730519
## iter 4 value -3.730519
## final value -3.730519
## converged
Model2
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1
## -1.000
## s.e. 0.003
##
## sigma^2 estimated as 0.0005718: log likelihood = 2905.66, aic = -5807.31
##
## $degrees_of_freedom
## [1] 1256
##
## $ttable
## Estimate SE t.value p.value
## ma1 -1 0.003 -338.3381 0
##
## $AIC
## [1] -4.619979
##
## $AICc
## [1] -4.619977
##
## $BIC
## [1] -4.611807
# Model3 = Auto.arima for logged, first differencing
Model3 <- auto.arima(log(adj_nflx))
summary(Model3)
## Series: log(adj_nflx)
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.0787 0.0430 0.0014
## s.e. 0.0283 0.0276 0.0006
##
## sigma^2 estimated as 0.0005683: log likelihood=2916.91
## AIC=-5825.82 AICc=-5825.79 BIC=-5805.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 4.975572e-06 0.02380137 0.01665857 -6.499572e-05 0.2881782
## MASE ACF1
## Training set 0.9922833 0.00128343
Diagnostic Test for Model3
# Diagnostic Checking
checkresiduals(Model3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2) with drift
## Q* = 21.964, df = 7, p-value = 0.002577
##
## Model df: 3. Total lags used: 10
preds <- forecast(Model3, h = 30)
autoplot(preds)
# Backtest ARIMA model
results <- backtest(Model3, log(adj_nflx), orig = 80, h = 30)
## [1] "RMSE of out-of-sample forecasts"
## [1] 0.02450318 0.03339095 0.04090226 0.04760317 0.05399938 0.05898188
## [7] 0.06332254 0.06752260 0.07072667 0.07382688 0.07646482 0.07891052
## [13] 0.08161427 0.08436618 0.08725607 0.08984353 0.09277125 0.09545512
## [19] 0.09855400 0.10134673 0.10382177 0.10642313 0.10855922 0.11083121
## [25] 0.11284425 0.11497889 0.11711686 0.11913397 0.12102913 0.12294952
## [1] "Mean absolute error of out-of-sample forecasts"
## [1] 0.01726316 0.02443677 0.03013491 0.03524700 0.03985101 0.04409672
## [7] 0.04734963 0.05052035 0.05305025 0.05589673 0.05807326 0.06013008
## [13] 0.06244413 0.06496971 0.06735709 0.06992802 0.07247516 0.07473823
## [19] 0.07695234 0.07922764 0.08113685 0.08311863 0.08479285 0.08695604
## [25] 0.08855427 0.09029957 0.09217862 0.09391302 0.09527088 0.09680530
## [1] "Bias of out-of-sample forecasts"
## [1] 0.001277000 0.002507773 0.003792010 0.005071183 0.006359524 0.007638182
## [7] 0.008933103 0.010259864 0.011616813 0.012962110 0.014303527 0.015650820
## [13] 0.016971505 0.018271686 0.019583302 0.020906558 0.022232131 0.023552783
## [19] 0.024839846 0.026151707 0.027458753 0.028753056 0.030059930 0.031368503
## [25] 0.032646930 0.033954124 0.035292652 0.036624957 0.037960275 0.039282467
print(paste("Average RMSE: ", round(exp(mean(results$rmse)), digits = 3)))
## [1] "Average RMSE: 1.089"
print(paste("Average MAE: ", round(exp(mean(results$mabso)), digits = 3)))
## [1] "Average MAE: 1.068"
Predicted Prices for Next 30 Days
preds_df = data.frame(preds)
predicted_prices <- exp(preds_df)
predicted_prices
## Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 1260 681.6063 661.0972 702.7515 650.4914 714.2094
## 1261 682.4630 654.6942 711.4095 640.4546 727.2267
## 1262 683.4341 649.5005 719.1405 632.2247 738.7914
## 1263 684.4065 645.3018 725.8811 625.5137 748.8443
## 1264 685.3804 641.7353 731.9938 619.7676 757.9394
## 1265 686.3557 638.6161 737.6640 614.7035 766.3598
## 1266 687.3323 635.8344 743.0012 610.1538 774.2731
## 1267 688.3103 633.3189 748.0768 606.0096 781.7882
## 1268 689.2898 631.0201 752.9401 602.1953 788.9805
## 1269 690.2706 628.9022 757.6272 598.6562 795.9050
## 1270 691.2528 626.9382 762.1651 595.3510 802.6028
## 1271 692.2364 625.1070 766.5747 592.2477 809.1061
## 1272 693.2214 623.3921 770.8727 589.3208 815.4403
## 1273 694.2078 621.7798 775.0726 586.5497 821.6260
## 1274 695.1956 620.2592 779.1855 583.9176 827.6801
## 1275 696.1848 618.8209 783.2207 581.4103 833.6168
## 1276 697.1755 617.4572 787.1860 579.0159 839.4479
## 1277 698.1675 616.1613 791.0881 576.7242 845.1837
## 1278 699.1610 614.9276 794.9326 574.5265 850.8329
## 1279 700.1558 613.7511 798.7247 572.4152 856.4032
## 1280 701.1521 612.6273 802.4687 570.3836 861.9012
## 1281 702.1498 611.5525 806.1685 568.4258 867.3328
## 1282 703.1489 610.5232 809.8274 566.5366 872.7033
## 1283 704.1494 609.5364 813.4485 564.7115 878.0172
## 1284 705.1514 608.5893 817.0346 562.9463 883.2787
## 1285 706.1548 607.6796 820.5880 561.2372 888.4917
## 1286 707.1596 606.8050 824.1111 559.5809 893.6593
## 1287 708.1659 605.9634 827.6058 557.9744 898.7848
## 1288 709.1735 605.1532 831.0740 556.4148 903.8708
## 1289 710.1826 604.3726 834.5173 554.8996 908.9201
GARCH Models - Look at Volatility of Stock Prices
GARCH Model1 with ARMA order (1,1)
nflx_garch <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(1,1)), distribution.model = "std")
nflx_garch1 <-ugarchfit(spec=nflx_garch, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch1
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(1,0,1)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 113.690205 4.384903 25.9276 0.000000
## ar1 1.000000 0.000796 1256.1653 0.000000
## ma1 -0.059036 0.026992 -2.1871 0.028734
## omega 0.152889 0.102623 1.4898 0.136274
## alpha1 0.081915 0.012078 6.7820 0.000000
## beta1 0.917085 0.013259 69.1661 0.000000
## shape 4.211412 0.401749 10.4827 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 113.690205 0.385913 294.6003 0.000000
## ar1 1.000000 0.000874 1143.9489 0.000000
## ma1 -0.059036 0.028456 -2.0747 0.038019
## omega 0.152889 0.129386 1.1816 0.237345
## alpha1 0.081915 0.016830 4.8674 0.000001
## beta1 0.917085 0.020337 45.0946 0.000000
## shape 4.211412 0.421497 9.9916 0.000000
##
## LogLikelihood : -4224.874
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7226
## Bayes 6.7512
## Shibata 6.7225
## Hannan-Quinn 6.7333
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.6016 0.4380
## Lag[2*(p+q)+(p+q)-1][5] 3.3735 0.2619
## Lag[4*(p+q)+(p+q)-1][9] 6.9326 0.1346
## d.o.f=2
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.170 0.6801
## Lag[2*(p+q)+(p+q)-1][5] 1.073 0.8429
## Lag[4*(p+q)+(p+q)-1][9] 1.499 0.9556
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.6494 0.500 2.000 0.4203
## ARCH Lag[5] 0.6968 1.440 1.667 0.8244
## ARCH Lag[7] 0.8238 2.315 1.543 0.9404
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 7.6532
## Individual Statistics:
## mu 0.0005036
## ar1 0.7415155
## ma1 0.2570143
## omega 0.5953135
## alpha1 1.2333304
## beta1 0.8482511
## shape 1.0509154
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.69 1.9 2.35
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.4964 0.6197
## Negative Sign Bias 0.4132 0.6795
## Positive Sign Bias 1.1354 0.2564
## Joint Effect 1.5176 0.6782
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 26.29 0.1223
## 2 30 27.14 0.5642
## 3 40 35.07 0.6495
## 4 50 51.37 0.3812
##
##
## Elapsed time : 0.3054609
GARCH MODEL2 WITH ARMA (2,2)
nflx_garch2 <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(2,2)), distribution.model = "std")
nflx_garch2 <-ugarchfit(spec=nflx_garch2, data=adj_nflx)
nflx_garch2
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(2,0,2)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 113.204458 0.410537 275.7470 0.000000
## ar1 0.022298 0.004577 4.8720 0.000001
## ar2 0.981119 0.004625 212.1114 0.000000
## ma1 0.915764 0.000015 60943.5104 0.000000
## ma2 -0.070207 0.000592 -118.5324 0.000000
## omega 0.155211 0.103317 1.5023 0.133023
## alpha1 0.082929 0.012137 6.8326 0.000000
## beta1 0.916071 0.013341 68.6661 0.000000
## shape 4.166105 0.391210 10.6493 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 113.204458 0.294881 383.8983 0.000000
## ar1 0.022298 0.005400 4.1294 0.000036
## ar2 0.981119 0.005623 174.4833 0.000000
## ma1 0.915764 0.000013 68575.9719 0.000000
## ma2 -0.070207 0.000415 -169.3417 0.000000
## omega 0.155211 0.129171 1.2016 0.229520
## alpha1 0.082929 0.016732 4.9561 0.000001
## beta1 0.916071 0.020324 45.0734 0.000000
## shape 4.166105 0.413512 10.0749 0.000000
##
## LogLikelihood : -4221.8
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7209
## Bayes 6.7576
## Shibata 6.7208
## Hannan-Quinn 6.7347
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.9913 3.194e-01
## Lag[2*(p+q)+(p+q)-1][11] 8.9600 8.242e-06
## Lag[4*(p+q)+(p+q)-1][19] 13.9519 5.984e-02
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1380 0.7103
## Lag[2*(p+q)+(p+q)-1][5] 0.9734 0.8657
## Lag[4*(p+q)+(p+q)-1][9] 1.3741 0.9652
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.5714 0.500 2.000 0.4497
## ARCH Lag[5] 0.6049 1.440 1.667 0.8523
## ARCH Lag[7] 0.7411 2.315 1.543 0.9517
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 7.3642
## Individual Statistics:
## mu 0.01413
## ar1 0.05526
## ar2 0.03545
## ma1 0.12500
## ma2 0.08744
## omega 0.60695
## alpha1 1.29812
## beta1 0.87017
## shape 1.07678
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.1 2.32 2.82
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.7645 0.4447
## Negative Sign Bias 0.2577 0.7967
## Positive Sign Bias 1.2745 0.2027
## Joint Effect 1.8533 0.6034
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 18.16 0.5120
## 2 30 22.61 0.7940
## 3 40 31.33 0.8042
## 4 50 37.47 0.8855
##
##
## Elapsed time : 0.6291411
GARCH MODEL3 WITH ARMA ORDER (3,3)
nflx_garch3 <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(3,3)), distribution.model = "std")
nflx_garch3 <-ugarchfit(spec=nflx_garch3, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch3
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(3,0,3)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 112.980156 1.406618 80.3205 0.00000
## ar1 1.506436 0.001629 924.9161 0.00000
## ar2 -1.357242 0.001712 -792.6000 0.00000
## ar3 0.853159 0.001952 436.9622 0.00000
## ma1 -0.572363 0.027208 -21.0362 0.00000
## ma2 0.897680 0.019185 46.7910 0.00000
## ma3 -0.037132 0.027646 -1.3431 0.17923
## omega 0.155388 0.103996 1.4942 0.13513
## alpha1 0.082432 0.012089 6.8186 0.00000
## beta1 0.916568 0.013289 68.9721 0.00000
## shape 4.197099 0.399745 10.4994 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 112.980156 0.531122 212.7198 0.000000
## ar1 1.506436 0.000627 2401.4776 0.000000
## ar2 -1.357242 0.000584 -2325.6007 0.000000
## ar3 0.853159 0.000228 3746.9702 0.000000
## ma1 -0.572363 0.028687 -19.9518 0.000000
## ma2 0.897680 0.019199 46.7564 0.000000
## ma3 -0.037132 0.030335 -1.2241 0.220926
## omega 0.155388 0.131475 1.1819 0.237252
## alpha1 0.082432 0.016700 4.9361 0.000001
## beta1 0.916568 0.020194 45.3886 0.000000
## shape 4.197099 0.425247 9.8698 0.000000
##
## LogLikelihood : -4221.973
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7243
## Bayes 6.7692
## Shibata 6.7242
## Hannan-Quinn 6.7412
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 1.031 0.3099
## Lag[2*(p+q)+(p+q)-1][17] 8.438 0.8273
## Lag[4*(p+q)+(p+q)-1][29] 16.372 0.2998
## d.o.f=6
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1312 0.7172
## Lag[2*(p+q)+(p+q)-1][5] 0.9880 0.8624
## Lag[4*(p+q)+(p+q)-1][9] 1.3847 0.9645
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.5769 0.500 2.000 0.4475
## ARCH Lag[5] 0.6430 1.440 1.667 0.8408
## ARCH Lag[7] 0.7626 2.315 1.543 0.9488
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 8.041
## Individual Statistics:
## mu 0.01276
## ar1 0.04650
## ar2 0.03577
## ar3 0.03583
## ma1 0.52343
## ma2 0.53937
## ma3 0.12094
## omega 0.61567
## alpha1 1.23718
## beta1 0.85103
## shape 1.13167
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.49 2.75 3.27
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.8284 0.4076
## Negative Sign Bias 0.3194 0.7495
## Positive Sign Bias 1.3735 0.1699
## Joint Effect 2.2187 0.5283
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 15.58 0.6849
## 2 30 18.13 0.9415
## 3 40 29.61 0.8612
## 4 50 43.98 0.6764
##
##
## Elapsed time : 0.63712
Table to Show Akaike Values for the GARCH Models
GARCH(2,2) and GARCH(3,3) had lower p values for weighted Ljung-Box Test on standardized residuals.
GARCH(1,1) had the best metrics and outperformed the other two GARCH models. This is the model that will be used.
GARCH_Models = c('GARCH(1,1)', 'GARCH(2,2)','GARCH(3,3)')
Akaike = c('6.722', '6.721', '6.724')
gtable = data.frame(GARCH_Models, Akaike)
gtable
## GARCH_Models Akaike
## 1 GARCH(1,1) 6.722
## 2 GARCH(2,2) 6.721
## 3 GARCH(3,3) 6.724
kable(gtable, format = 'html', caption = '<b>Garch Model - Akaike Values<b>',position = 'center', align = 'cc') %>%
kable_styling(full_width = TRUE, position = 'center')%>%
kable_classic(html_font = 'Times New Roman')
| GARCH_Models | Akaike |
|---|---|
| GARCH(1,1) | 6.722 |
| GARCH(2,2) | 6.721 |
| GARCH(3,3) | 6.724 |
FORECASTING
Forecasting with bootstrap forecast to forecast both series and conditional variances.
nflx_garch1 with arma order (1,1)
nflx_predict <- ugarchboot(nflx_garch1, n.ahead=30,method=c("Partial","Full")[1])
nflx_predict
##
## *-----------------------------------*
## * GARCH Bootstrap Forecast *
## *-----------------------------------*
## Model : sGARCH
## n.ahead : 30
## Bootstrap method: partial
## Date (T[0]): 2021-11-15
##
## Series (summary):
## min q.25 mean q.75 max forecast[analytic]
## t+1 586.87 673.93 681.20 689.12 757.12 679.43
## t+2 560.49 672.15 681.20 690.66 804.67 679.43
## t+3 580.38 670.32 681.49 692.11 831.02 679.43
## t+4 570.78 668.73 682.70 696.55 825.64 679.43
## t+5 550.79 666.31 683.22 699.21 840.86 679.43
## t+6 568.16 665.27 685.05 701.84 855.92 679.43
## t+7 560.54 664.21 685.89 704.98 895.48 679.43
## t+8 570.64 663.98 688.38 709.04 901.18 679.43
## t+9 567.82 663.15 689.66 713.30 885.20 679.43
## t+10 573.34 664.71 692.29 715.24 900.84 679.43
## .....................
##
## Sigma (summary):
## min q0.25 mean q0.75 max forecast[analytic]
## t+1 12.9669 12.967 12.967 12.967 12.967 12.967
## t+2 12.4239 12.472 13.003 12.974 29.260 12.966
## t+3 11.9041 12.081 13.015 13.099 34.827 12.966
## t+4 11.4087 11.796 12.973 13.186 34.671 12.965
## t+5 10.9379 11.550 13.004 13.436 33.220 12.965
## t+6 10.5004 11.283 13.000 13.644 32.104 12.964
## t+7 10.1710 11.127 13.068 13.710 43.231 12.963
## t+8 9.7792 11.005 13.135 13.988 43.014 12.963
## t+9 9.4474 11.011 13.279 14.015 42.372 12.962
## t+10 9.1343 10.829 13.195 14.041 41.803 12.962
## .....................
plot(nflx_predict,which=2)
Predicted Prices by GARCH Models
nflx_predict_df <- as.data.frame(nflx_predict, which="series", type="summary")
nflx_predict_df
## t+1 t+2 t+3 t+4 t+5 t+6 t+7 t+8
## min 586.8727 560.4925 580.3832 570.7837 550.7910 568.1595 560.5356 570.6396
## q.25 673.9271 672.1545 670.3239 668.7288 666.3128 665.2665 664.2135 663.9809
## mean 681.2002 681.2022 681.4897 682.6959 683.2213 685.0502 685.8937 688.3813
## q.75 689.1195 690.6584 692.1118 696.5533 699.2119 701.8359 704.9803 709.0429
## max 757.1204 804.6695 831.0242 825.6447 840.8649 855.9191 895.4823 901.1796
## t+9 t+10 t+11 t+12 t+13 t+14 t+15 t+16
## min 567.8193 573.3405 581.1976 579.4033 554.8548 531.9298 524.0622 515.8633
## q.25 663.1464 664.7124 666.8361 665.6952 666.6221 664.9383 665.3408 666.4584
## mean 689.6614 692.2884 693.5479 694.1471 694.9478 696.0688 697.4145 698.7719
## q.75 713.2974 715.2414 717.7133 717.8888 719.1906 720.7093 723.6458 728.1678
## max 885.1973 900.8435 905.2487 937.0410 1012.6243 1050.7914 990.1137 1002.6399
## t+17 t+18 t+19 t+20 t+21 t+22 t+23
## min 504.2789 502.8419 489.0520 506.7233 512.1404 503.1369 464.9144
## q.25 666.8987 664.6277 666.8792 666.1957 665.1895 664.2369 664.9360
## mean 699.3504 700.1285 701.2160 703.2033 704.6703 705.9801 707.8804
## q.75 727.6503 729.8916 731.0161 733.9533 736.7755 741.7456 740.8535
## max 1000.0113 1031.0192 1012.8966 1029.0389 1043.9284 1042.4031 1029.1598
## t+24 t+25 t+26 t+27 t+28 t+29 t+30
## min 465.0879 488.6456 512.2124 525.3471 485.6206 480.6989 475.4839
## q.25 666.1787 666.6367 664.9942 669.8717 668.7352 665.9173 667.8820
## mean 708.6575 710.0897 711.8356 713.1691 713.9477 715.1216 716.8875
## q.75 741.7617 744.4483 750.6790 748.7896 750.7050 754.1577 755.7193
## max 1049.7215 1087.4207 1127.5932 1093.9125 1217.3089 1301.9505 1335.2386
pred_table <- as.data.frame(t(nflx_predict_df))
pred_table
## min q.25 mean q.75 max
## t+1 586.8727 673.9271 681.2002 689.1195 757.1204
## t+2 560.4925 672.1545 681.2022 690.6584 804.6695
## t+3 580.3832 670.3239 681.4897 692.1118 831.0242
## t+4 570.7837 668.7288 682.6959 696.5533 825.6447
## t+5 550.7910 666.3128 683.2213 699.2119 840.8649
## t+6 568.1595 665.2665 685.0502 701.8359 855.9191
## t+7 560.5356 664.2135 685.8937 704.9803 895.4823
## t+8 570.6396 663.9809 688.3813 709.0429 901.1796
## t+9 567.8193 663.1464 689.6614 713.2974 885.1973
## t+10 573.3405 664.7124 692.2884 715.2414 900.8435
## t+11 581.1976 666.8361 693.5479 717.7133 905.2487
## t+12 579.4033 665.6952 694.1471 717.8888 937.0410
## t+13 554.8548 666.6221 694.9478 719.1906 1012.6243
## t+14 531.9298 664.9383 696.0688 720.7093 1050.7914
## t+15 524.0622 665.3408 697.4145 723.6458 990.1137
## t+16 515.8633 666.4584 698.7719 728.1678 1002.6399
## t+17 504.2789 666.8987 699.3504 727.6503 1000.0113
## t+18 502.8419 664.6277 700.1285 729.8916 1031.0192
## t+19 489.0520 666.8792 701.2160 731.0161 1012.8966
## t+20 506.7233 666.1957 703.2033 733.9533 1029.0389
## t+21 512.1404 665.1895 704.6703 736.7755 1043.9284
## t+22 503.1369 664.2369 705.9801 741.7456 1042.4031
## t+23 464.9144 664.9360 707.8804 740.8535 1029.1598
## t+24 465.0879 666.1787 708.6575 741.7617 1049.7215
## t+25 488.6456 666.6367 710.0897 744.4483 1087.4207
## t+26 512.2124 664.9942 711.8356 750.6790 1127.5932
## t+27 525.3471 669.8717 713.1691 748.7896 1093.9125
## t+28 485.6206 668.7352 713.9477 750.7050 1217.3089
## t+29 480.6989 665.9173 715.1216 754.1577 1301.9505
## t+30 475.4839 667.8820 716.8875 755.7193 1335.2386